chanzuckerberg · mlin · Jul 26, 2021 · Jul 22, 2021 · Jul 24, 2021 · Jul 24, 2021
diff --git a/WDL/CLI.py b/WDL/CLI.py
@@ -551,6 +551,14 @@ def fill_run_subparser(subparsers):
         action="store_true",
         help="override any configuration enabling cache lookup for call outputs & downloaded files",
     )
+    group.add_argument(
+        "--env",
+        action="append",
+        metavar="VARNAME[=VALUE]",
+        type=str,
+        help="Environment variable to pass through to [or set outright in]"
+        " all task environments (portability warning: non-standard side channel)",
+    )
     group.add_argument(
         "--copy-input-files",
         action="store_true",
@@ -583,6 +591,7 @@ def runner(
     cfg=None,
     runtime_cpu_max=None,
     runtime_memory_max=None,
+    env=[],
     runtime_defaults=None,
     max_tasks=None,
     copy_input_files=False,
@@ -661,6 +670,8 @@ def runner(
                     cfg_overrides["task_runtime"]["defaults"] = infile.read()
         if runtime_cpu_max is not None:
             cfg_overrides["task_runtime"]["cpu_max"] = runtime_cpu_max
+        if env:
+            cfg_overrides["task_runtime"]["env"] = runner_env_override(cfg, env)
         if runtime_memory_max is not None:
             runtime_memory_max = (
                 -1 if runtime_memory_max.strip() == "-1" else parse_byte_size(runtime_memory_max)
@@ -672,6 +683,10 @@ def runner(
 
         cfg.override(cfg_overrides)
         cfg.log_all()
+        if cfg["task_runtime"].get_dict("env"):
+            logger.warning(
+                "--env is a non-standard side channel; relying on it is probably not portable"
+            )
 
         # check root
         if not path_really_within((run_dir or os.getcwd()), cfg["file_io"]["root"]):
@@ -1096,6 +1111,20 @@ def bold(line):
         print(line, file=sys.stderr)
 
 
+def runner_env_override(cfg, args):
+    env_override = cfg["task_runtime"].get_dict("env")
+    for item in args:
+        sep = item.find("=")
+        if sep == 0:
+            raise Error.InputError("invalid --env argument: " + item)
+        name = item[: sep if sep >= 0 else len(item)]
+        value = None
+        if sep != -1:
+            value = item[sep + 1 :]
+        env_override[name] = value
+    return env_override
+
+
 def is_constant_expr(expr):
     """
     Decide if the expression is "constant" for the above purposes

diff --git a/WDL/runtime/config_templates/default.cfg b/WDL/runtime/config_templates/default.cfg
@@ -95,6 +95,14 @@ as_user = false
 # line of defense against code injection into task command scripts. (As this is a blunt tool, it's
 # preferable to validate inputs before invoking miniwdl, if possible.)
 placeholder_regex = (.|\n)*
+# Set environment variable(s) in all task environments. The JSON object is keyed by environment
+# variable name. If a variable is set to null here, then miniwdl passes through the variable from
+# its own environment (if there defined). Any other value is used as a string.
+# --env (New in v1.2.2)
+# Warning: this is a non-standard side channel and relying on it is probably not portable to other
+# WDL engines and/or compute platforms. Explicit WDL task inputs are usually better, except for a
+# few cases like auth tokens for platform-specific tasks.
+env = {}
 
 
 [download_cache]

diff --git a/WDL/runtime/task.py b/WDL/runtime/task.py
@@ -532,6 +532,34 @@ def _eval_task_runtime(
 
     if ans:
         logger.info(_("effective runtime", **ans))
+
+    env_vars_override = {}
+    env_vars_skipped = []
+    for ev_name, ev_value in cfg["task_runtime"].get_dict("env").items():
+        if ev_value is None:
+            try:
+                env_vars_override[ev_name] = os.environ[ev_name]
+            except KeyError:
+                env_vars_skipped.append(ev_name)
+        else:
+            env_vars_override[ev_name] = str(ev_value)
+    if env_vars_skipped:
+        logger.warning(
+            _("skipping pass-through of undefined environment variable(s)", names=env_vars_skipped)
+        )
+    if env_vars_override:
+        # usually don't dump values into log, as they may often be auth tokens
+        logger.notice(
+            _(
+                "overriding environment variables (portability warning)",
+                names=list(env_vars_override.keys()),
+            )
+        )
+        logger.debug(
+            _("overriding environment variables (portability warning)", **env_vars_override)
+        )
+        ans["env"] = env_vars_override
+
     unused_keys = list(
         key
         for key in runtime_values

diff --git a/WDL/runtime/task_container.py b/WDL/runtime/task_container.py
@@ -568,6 +568,7 @@ def _run(self, logger: logging.Logger, terminating: Callable[[], bool], command:
                 "groups": groups,
                 "labels": {"miniwdl_run_id": self.run_id},
                 "container_labels": {"miniwdl_run_id": self.run_id},
+                "env": [f"{k}={v}" for (k, v) in self.runtime_values.get("env", {}).items()],
             }
             kwargs.update(self.create_service_kwargs or {})
             logger.debug(_("docker create service kwargs", **kwargs))

diff --git a/tests/runner.t b/tests/runner.t
@@ -11,7 +11,7 @@ source tests/bash-tap/bash-tap-bootstrap
 export PYTHONPATH="$SOURCE_DIR:$PYTHONPATH"
 miniwdl="python3 -m WDL"
 
-plan tests 78
+plan tests 80
 
 $miniwdl run_self_test
 is "$?" "0" "run_self_test"
@@ -451,3 +451,23 @@ $miniwdl run inside/inside.wdl
 is "$?" "0" "outside import allowed"
 $miniwdl run inside/inside.wdl --no-outside-imports
 is "$?" "2" "outside import denied"
+
+# test --env
+cat << 'EOF' > env.wdl
+version development
+task t {
+    input {}
+    command <<<
+        echo "${WWW}/${XXX}/${YYY}/${ZZZ}"
+    >>>
+    output {
+        String out = read_string(stdout())
+    }
+    runtime {
+        docker: "ubuntu:20.04"
+    }
+}
+EOF
+XXX=quick YYY=not $miniwdl run env.wdl --env WWW --env XXX --env YYY= --env "ZZZ=brown fox" -o env_out.json
+is "$?" "0" "--env succeeds"
+is "$(jq -r '.outputs["t.out"]' env_out.json)" "/quick//brown fox" "--env correct"
diff --git a/tests/test_7runner.py b/tests/test_7runner.py
@@ -10,6 +10,7 @@
 import platform
 from testfixtures import log_capture
 from .context import WDL
+from unittest.mock import patch
 
 class RunnerTestCase(unittest.TestCase):
     """
@@ -984,3 +985,50 @@ def test_task(self):
         self.assertEqual(outp["results"], ["AliceBas", "Bas"])
         outp = self._run(caller, {"a": "Alyssa"})
         self.assertEqual(outp["results"], ["Alyssa", None])
+
+
+class TestPassthruEnv(RunnerTestCase):
+    def test1(self):
+        wdl = """
+        version development
+        task t {
+            input {
+                String k1
+            }
+            command <<<
+                echo ~{k1}
+                echo "$TEST_ENV_VAR"
+                echo "$SET_ENV_VAR"
+                echo "$NOT_PASSED_IN_VAR"
+            >>>
+            output {
+                String out = read_string(stdout())
+            }
+            runtime {
+                docker: "ubuntu:20.04"
+            }
+        }
+        """
+        cfg = WDL.runtime.config.Loader(logging.getLogger(self.id()), [])
+        cfg.override({"task_runtime": {"env": {"TEST_ENV_VAR": None, "SET_ENV_VAR": "set123"}}})
+        with open(os.path.join(self._dir, "Alice"), mode="w") as outfile:
+            print("Alice", file=outfile)
+        out = self._run(wdl, {"k1": "stringvalue"}, cfg=cfg)
+        self.assertEqual(out["out"], """stringvalue
+
+set123
+""",
+        )
+        env = {
+            "TEST_ENV_VAR": "passthru_test_success",
+            "NOT_PASSED_IN_VAR": "this shouldn't be passed in",
+        }
+        with patch.dict("os.environ", env):
+            out = self._run(wdl, {"k1": "stringvalue"}, cfg=cfg)
+        self.assertEqual(
+            out["out"],
+            """stringvalue
+passthru_test_success
+set123
+""",
+        )