WDL/runtime/config_templates/default.cfg

# Default configuration for miniwdl runner; see:
#   https://miniwdl.readthedocs.io/en/latest/runner_reference.html#configuration
# For guidance on overriding these defaults with a custom .cfg file and/or environment variables.
# While this file exhibits all available configuration options, custom .cfg files need only specify
# the options to override.
#
# This file is organized into sections & options within each section. Some values take the form of
# a JSON object or array, which may span multiple -indented- lines. Except within JSON, string
# values don't need to be in quotes. Environment variable substitutions ($VAR) are supported.


[scheduler]
# Thread pool size bounding how many WDL tasks the runner may attempt to execute concurrently;
# actual concurrency may be less due to CPU/memory resource scheduling. This setting may need to be
# increased on multi-node deployments, but not beyond ~200 (guideline) to avoid excessive overhead
# in miniwdl's Python process.
# 0 = default to host `nproc`.
# -@
task_concurrency = 0
# task_concurrency applies to URI download tasks too, however, it may be desirable to limit their
# concurrency further still, since they're exceptionally I/O-intensive. In that case set
# download_concurrency to a nonzero value lower than the effective task_concurrency.
# (New in v1.3.1)
download_concurrency = 0
# Thread pool size bounding how many subworkflow calls the runner may attempt to execute
# concurrently (per level of subworkflow call nesting depth).
# 0 = max(task_concurrency, `nproc`)
# (New in v1.5.4)
subworkflow_concurrency = 0
# container backend; docker_swarm (default), singularity, or as added by plug-ins
container_backend = docker_swarm
# When one task fails, immediately terminate all other running tasks. If disabled, stop launching
# new tasks, but leave those still running to succeed or fail on their own. The latter mode might
# be useful with call caching (see below) to avoid discarding all work done by other tasks.
fail_fast = true


[docker_swarm]
# Docker Swarm Mode is the default container backend. It's included with every docker installation,
# but disabled by default. The following option allows miniwdl to automatically initialize the local
# host as a one-node swarm if needed, by performing
#     docker swarm init --listen-addr 127.0.0.1 --advertise-addr 127.0.0.1
# This is is typically helpful for single-node use, but should be disabled if a preexisting swarm
# is to be used.
auto_init = true
# Period for polling the status of running containers. Increasing this reduces steady-state load on
# miniwdl and dockerd (when many tasks are running concurrently), but also increases the delay
# noticing when tasks have exited and (--verbose) the appearance of standard error logs. Worker
# threads randomize the interval +/- 50% to spread out activity.
polling_period_seconds = 1.5
# Retry idempotent dockerd requests yielding 5xx status code (after polling_period_seconds)
server_error_retries = 2
# Recognize e.g. `docker_network: "host"` in task runtime sections, and associate respective
# containers with the docker network, if the network name appears in this list -- e.g.
#   allow_networks = ["host"]
# (New in v1.5.1)
allow_networks = []


[file_io]
# During startup, require the run directory and input files to reside somewhere within this root
# directory. This constraint is needed in cluster deployments relying on a shared mount. It can
# also be useful on a single node to prevent accidental consumption of a small boot/home volume,
# by confining workflow operations to some spacious scratch/data mount.
root = /
# Allow workflows access to any existing file/directory within the root, rather than (default) only
# those the operator expressly supplied through the command line or JSON inputs. Not recommended,
# for security and portability reasons; but enables workflows that derive input filenames at
# runtime, or receive input filenames through a manifest file. (New in v1.7.0)
allow_any_input = false
# Populate task working directory with writable copies of the input files, instead of mounting them
# in situ & read-only. Needed if tasks want to write/move/rename input files, but costs time and
# disk space. --copy-input-files
copy_input_files = false
# Selectively copy_input_files for those tasks whose names appear in this list. (New in v1.3.1)
copy_input_files_for = []
# Each succeeded run directory has an "out/" folder containing (by default) a symbolic link to each
# output file in its original working location. If output_hardlinks is true, then out/ is populated
# with hardlinks instead of symlinks. Beware the potential confusion arising from files with
# multiple hardlinks! See also delete_work, below.
output_hardlinks = false
# Delete task working directory upon completion. The task container's working directory is a
# bind-mounted host directory, so files written into it are left behind after the container is torn
# down. If tasks write large non-output files into their working directory (instead of $TMPDIR as
# they should), then it can be useful to delete them automatically.
# Values:
#   false   = never delete (default)
#   success = delete working directories of succeeded tasks
#   failure =            "                  failed tasks
#   always  =            "                  both succeeded and failed tasks
# The "success" and "always" settings require output_hardlinks, above, to be true; otherwise,
# output files would be deleted too. Input/output JSON, logs, and stdout/stderr are always retained
# in the task run directory (above the container working directory).
delete_work = false
# Suggest that each task's temporary directory should reside within the mounted task working
# directory, instead of the storage backing the container's root filesystem. The latter (default)
# is usually preferred because the working directory is more likely to reside on slower network-
# attached storage. But mount_tmpdir may be helpful if tasks use large amounts of scratch space and
# the working directory storage has more capacity.
# The exact effect of this setting depends on the container backend implementation: some may change
# where they mount /tmp, some may merely override TMPDIR inside the container, and others may not
# yet implement it at all. (New in v1.5.4)
mount_tmpdir = false
# Selectively mount_tmpdir for those tasks whose names appear in this list. (New in v1.5.4)
mount_tmpdir_for = []


[task_runtime]
# Effective maximum values of runtime.cpu and runtime.memory (bytes), which evaluated values may be
# rounded down to in order to "fit" the maximum available host resources. Warning: tasks may
# deadlock if these are set higher than actual achievable resources.
# 0 = detect host resources, -1 = do not apply a limit.
# --runtime-cpu-max, --runtime-memory-max
cpu_max = 0
memory_max = 0
# A task's runtime.memory is used as a "reservation" to guide container scheduling, but isn't an
# enforced limit unless memory_limit_multiplier is positive, which sets a hard limit of
# memory_limit_multiplier*runtime.memory. Recommendation: if activating this, disable host swap.
memory_limit_multiplier = 0.0
# Defaults which each task's runtime{} section will be merged into. --runtime-defaults
defaults = {
        "docker": "ubuntu:20.04"
    }
# Defaults applying to URI download tasks specifically. (New in v1.5.1)
download_defaults = {
        "cpu": 2,
        "memory": "1G"
    }
# Run the command script as the invoking user's uid:gid instead of usually running as root. More
# secure, but interferes with commands that assume root access e.g. apt-get. --as-me
as_user = false
# Fail the evaluation of ${}/~{} placeholders in task command templates, if the interpolated string
# does not match this POSIX regular expression. The default admits anything (including newlines),
# but applications that must process untrusted inputs might set this more restrictively, as one
# line of defense against code injection into task command scripts. (As this is a blunt tool, it's
# preferable to validate inputs before invoking miniwdl, if possible.)
placeholder_regex = (.|\n)*
# Set environment variable(s) in all task environments. The JSON object is keyed by environment
# variable name. If a variable is set to null here, then miniwdl passes through the variable from
# its own environment (if there defined). Any other value is used as a string.
# --env (New in v1.2.2)
# Warning: this is a non-standard side channel and relying on it is probably not portable to other
# WDL engines and/or compute platforms. Explicit WDL task inputs are usually better, except for a
# few cases like auth tokens for platform-specific tasks.
env = {}
# If true, recognize `privileged: true` in task runtime sections and add restricted capabilities to
# respective containers. Not recommended, for security & portability reasons. (New in v1.4.2)
allow_privileged = false


[download_cache]
# When File or Directory inputs are given URIs to be downloaded, store the downloaded copy in a
# local directory where it can later be found and reused for the same input URI.
put = false
# Enable retrieval of File/Directory input URIs from the local cache
get = false
# Base directory for the local download cache. A relative path will be joined to [file_io] root, or
# use $PWD to reference the miniwdl process working directory.
dir = /tmp/miniwdl_download_cache
# Remove URI query strings for cache key/lookup purposes. By default, downloads from URIs with
# query strings are never cached (neither put nor get).
ignore_query = false
# To be eligible for caching (in addition to above options), a URI must (i) match at least one glob
# pattern in enable_patterns, AND (ii) not match any disable_patterns.
enable_patterns = ["*"]
disable_patterns = ["*.php", "*.aspx"]
# flock files in the cache directory whilst in use by a running workflow. Helps external cache
# cleaning logic to avoid disrupting running workflows. But, may be unsuitable for network file
# systems with low limits on the number of outstanding flocks.
# (Opt-out new in v1.3.1)
flock = true


[download_aria2c]
# see: https://github.com/chanzuckerberg/miniwdl/tree/main/tools_image
docker = ghcr.io/miniwdl-ext/miniwdl-tools:Id_sha256_73733b619d46f8a3f75229fb6d0b3f61868d159be5fb0ddc3a04613feab9ad18


[download_awscli]
# If workflow inputs or generates s3:// URIs, load AWS credentials using boto3 on the miniwdl host.
# Note: If running inside EC2, downloader & other tasks might be able assume an IAM role via the
#       instance metadata service, regardless of this setting. You can set the environment variable
#       AWS_EC2_METADATA_DISABLED=true to prevent this (or use iptables to block the endpoint IP).
# Recommendation: use only trusted WDL and Docker images, if either host_credentials = true or an
#                 EC2 instance profile is available.
# Failing all of the above, public S3 URIs can always be used.
host_credentials = false
# see: https://github.com/chanzuckerberg/miniwdl/tree/main/tools_image
docker = ghcr.io/miniwdl-ext/miniwdl-tools:Id_sha256_73733b619d46f8a3f75229fb6d0b3f61868d159be5fb0ddc3a04613feab9ad18


[download_gsutil]
# current version from https://github.com/GoogleCloudPlatform/cloud-sdk-docker/releases
docker = gcr.io/google.com/cloudsdktool/cloud-sdk:398.0.0-slim


[call_cache]
# Register task outputs for potential reuse by subsequent executions of the same task & inputs
put = false
# Enable use of previously-cached outputs
get = false
# Pluggable implementation: the default stores cache JSON files in a local directory, and checks
# posix mtimes of any local files referenced in the cached inputs/outputs (invalidating the cache
# entry if any referenced files were modified or deleted in the meantime).
backend = dir
# A relative path will be joined to [file_io] root, or use $PWD to reference the miniwdl process
# working directory.
dir = ~/.cache/miniwdl


[plugins]
# Control which plugins are used. Plugins are installed using the Python entry points convention,
#   https://packaging.python.org/specifications/entry-points/
# Furthermore for a plugin to be used, its "object reference" must (i) match at least one glob
# pattern in enable_patterns, AND (ii) not match any disable_patterns.
enable_patterns = ["*"]
disable_patterns = ["miniwdl_task_omnibus_example:*"]


[singularity]
# Singularity task runtime -- with `singularity` CLI already set up, set
#     [task_scheduler] container_backend = singularity
#
# singularity executable and any desired global options
exe = ["singularity"]
# Configuration options to pass to `singularity run` invocations. Defaults for docker-like
# isolation from the host.
run_options = [
        "--containall",
        "--no-mount", "hostfs",
        "--fakeroot"
    ]

# Where pulled images should be stored to save image construction time between
# starting the same task. Empty value -> no image cache used.
image_cache=

[udocker]
# udocker task runtime -- with `udocker` CLI already set up, set
#     [task_scheduler] container_backend = udocker
#
# udocker executable and any desired global options
exe = ["udocker", "--allow-root"]
# Configuration options to pass to `udocker run` invocations
run_options = ["--nobanner", "--rm"]