Skip to content

Commit

Permalink
Merge 9bd51df into 09172ec
Browse files Browse the repository at this point in the history
  • Loading branch information
mlin committed Aug 11, 2019
2 parents 09172ec + 9bd51df commit 5c01be8
Show file tree
Hide file tree
Showing 3 changed files with 151 additions and 2 deletions.
2 changes: 2 additions & 0 deletions WDL/Expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -942,6 +942,8 @@ def _eval(
if isinstance(innard_value, Value.Pair):
assert self.member in ["left", "right"]
return innard_value.value[0 if self.member == "left" else 1]
if isinstance(innard_value, Value.Struct):
return innard_value.value[self.member]
raise NotImplementedError()

@property
Expand Down
50 changes: 48 additions & 2 deletions WDL/runtime/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import pickle
from typing import Optional, List, Set, Tuple, NamedTuple, Dict, Union, Iterable, Callable, Any
from .. import Env, Type, Value, Tree, StdLib
from ..Error import InputError
from .._util import write_values_json, provision_run_dir
from .task import run_local_task
from .error import TaskFailure
Expand Down Expand Up @@ -316,7 +317,7 @@ def _do_job(
pass
if v is None:
if job.node.expr:
v = job.node.expr.eval(env, stdlib=stdlib)
v = job.node.expr.eval(env, stdlib=stdlib).coerce(job.node.type)
else:
assert job.node.type.optional
v = Value.Null()
Expand All @@ -333,8 +334,14 @@ def _do_job(
# check workflow inputs for additional inputs supplied to this call
for b in self.inputs.enter_namespace(job.node.name):
call_inputs = call_inputs.bind(b.name, b.value)
# issue CallInstructions
# coerce inputs to required types
assert isinstance(job.node.callee, (Tree.Task, Tree.Workflow))
callee_inputs = job.node.callee.available_inputs
call_inputs = call_inputs.map(
lambda b: Env.Binding(b.name, b.value.coerce(callee_inputs[b.name].type))
)
_check_call_input_files(self, job.node.name, env, call_inputs)
# issue CallInstructions
self.logger.warning("issue %s on %s", job.id, job.node.callee.name)
inplog = json.dumps(self.values_to_json(call_inputs))
self.logger.info("input %s <- %s", job.id, inplog if len(inplog) < 4096 else "(large)")
Expand Down Expand Up @@ -511,6 +518,45 @@ def _gather(
return ans


def _host_filenames(env: Env.Bindings[Value.Base]) -> Set[str]:
"Get the host filenames of all File values in the environment"
ans = set()

def collector(v: Value.Base) -> None:
if isinstance(v, Value.File):
ans.add(v.value)
for ch in v.children:
collector(ch)

for b in env:
collector(b.value)
return ans


def _check_call_input_files(
self: StateMachine,
call_name: str,
env: Env.Bindings[Value.Base],
inputs: Env.Bindings[Value.Base],
) -> None:
"""
Security check that all input Files in a call's inputs are either in the workflow inputs
or a previous call's outputs; impedes access to arbitrary host files not supplied as inputs
nor newly generated.
"""

allowed_filenames = _host_filenames(self.inputs)
for job in self.finished:
if isinstance(self.jobs[job].node, Tree.Call):
allowed_filenames |= _host_filenames(self.job_outputs[job])

disallowed_filenames = _host_filenames(inputs) - allowed_filenames
if disallowed_filenames:
raise InputError(
f"call {call_name} inputs use unknown file(s): {', '.join(list(disallowed_filenames)[:10])}"
)


def run_local_workflow(
workflow: Tree.Workflow,
posix_inputs: Env.Bindings[Value.Base],
Expand Down
101 changes: 101 additions & 0 deletions tests/test_6workflowrun.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,3 +546,104 @@ def test_subworkflow(self):
""", {"n": 3})
self.assertEqual(outputs["sums"], [1, 5, 14])
self.assertEqual(outputs["sum"], 20)

def test_host_file_access(self):
exn = self._test_workflow("""
version 1.0
workflow hacker9000 {
input {
}
File your_passwords = "/etc/passwd"
call tweet_file { input: file = your_passwords }
}
task tweet_file {
input {
File file
}
command {
cat ~{file}
}
}
""", expected_exception=WDL.Error.InputError)
self.assertTrue("inputs use unknown file" in str(exn))

exn = self._test_workflow("""
version 1.0
struct Box {
Array[String] str
}
workflow hacker9000 {
input {
}
call sneaky
scatter (s in sneaky.box.str) {
call tweet_file { input: file = s }
}
}
task sneaky {
command {
echo "/etc/passwd"
}
output {
Box box = object {
str: read_lines(stdout())
}
}
}
task tweet_file {
input {
File file
}
command {
cat ~{file}
}
}
""", expected_exception=WDL.Error.InputError)
self.assertTrue("inputs use unknown file" in str(exn))

# positive control
with open(os.path.join(self._dir, "allowed.txt"), "w") as outfile:
outfile.write("yo")
outputs = self._test_workflow("""
version 1.0
struct Box {
Array[File] str
}
workflow hacker8999 {
input {
Box box
}
call hello
scatter (b in [box, hello.box]) {
Array[File] str = b.str
}
scatter (f in flatten(str)) {
call tweet_file { input: file = f }
}
output {
Array[String] tweets = tweet_file.tweet
}
}
task hello {
command {
echo "Hello, world!"
}
output {
Box box = object {
str: [stdout()]
}
}
}
task tweet_file {
input {
File file
}
command {
cat ~{file}
}
output {
String tweet = read_string(stdout())
}
}
""", inputs={"box": { "str": [os.path.join(self._dir, "allowed.txt")] }})
self.assertEqual(outputs["tweets"], ["yo", "Hello, world!\n"])

0 comments on commit 5c01be8

Please sign in to comment.