Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make inputs with defaults implicitly optional #507

Merged
merged 6 commits into from
Jun 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions WDL/CLI.py
Original file line number Diff line number Diff line change
Expand Up @@ -1032,6 +1032,8 @@ def runner_input_json_file(available_inputs, namespace, input_file, downloadable
.source_text
)
input_json = yaml.safe_load(input_json)
if not isinstance(input_json, dict):
raise Error.InputError("check JSON input; expected top-level object")
try:
ans = values_from_json(input_json, available_inputs, namespace=namespace)
except Error.InputError as exn:
Expand Down
44 changes: 23 additions & 21 deletions WDL/Lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,8 +520,10 @@ def decl(self, obj: Tree.Decl) -> Any:
def call(self, obj: Tree.Call) -> Any:
for name, inp_expr in obj.inputs.items():
decl = _find_input_decl(obj, name)
if not inp_expr.type.coerces(decl.type, check_quant=True) and not _is_array_coercion(
decl.type, inp_expr.type
# treat input with default as optional, with or without the ? type quantifier
decltype = decl.type.copy(optional=True) if decl.expr else decl.type
if not inp_expr.type.coerces(decltype, check_quant=True) and not _is_array_coercion(
decltype, inp_expr.type
):
msg = "input {} {} = :{}:".format(str(decl.type), decl.name, str(inp_expr.type))
self.add(obj, msg, inp_expr.pos)
Expand Down Expand Up @@ -819,32 +821,32 @@ def call(self, obj: Tree.Call) -> Any:
@a_linter
class UnnecessaryQuantifier(Linter):
# A declaration like T? x = :T: where the right-hand side can't be null.
# The optional quantifier is unnecessary except within a task/workflow
# input section (where it denotes that the default value can be overridden
# by expressly passing null). Another exception is File? outputs of tasks,
# e.g. File? optional_file_output = "filename.txt"

# Caveats:
# 1. Exception for File? output of tasks, where this is normal.
# 2. Specific warning when x is an input, and the interpretation is underspecified by WDL
# (called with None, does the binding take None or the default?)
def decl(self, obj: Tree.Decl) -> Any:
if obj.type.optional and obj.expr and not obj.expr.type.optional:
tw = obj
while not isinstance(tw, (Tree.Task, Tree.Workflow)):
tw = getattr(tw, "parent")
assert isinstance(tw, (Tree.Task, Tree.Workflow))
if (
isinstance(tw.inputs, list)
and obj not in tw.inputs
and not (
isinstance(tw, Tree.Task)
and isinstance(obj.type, (Type.File, Type.Directory))
and obj in tw.outputs
)
if not (
isinstance(tw, Tree.Task)
and isinstance(obj.type, (Type.File, Type.Directory))
and obj in tw.outputs
):
self.add(
obj,
"unnecessary optional quantifier (?) for non-input {} {}".format(
obj.type, obj.name
),
)
if not isinstance(tw.inputs, list) or obj in tw.inputs:
self.add(
obj,
f"input {obj.type} {obj.name} is implicitly optional since it has a default;"
" consider removing ? quantifier, which may not behave consistently between WDL interpreters",
)
else:
self.add(
obj,
f"unnecessary optional quantifier (?) for non-input {obj.type} {obj.name}",
)


_shellcheck_available = None
Expand Down
6 changes: 4 additions & 2 deletions WDL/Tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -613,10 +613,12 @@ def typecheck_input(
for name, expr in self.inputs.items():
try:
decl = self.callee.available_inputs[name]
# treat input with default as optional, with or without the ? type quantifier
decltype = decl.type.copy(optional=True) if decl.expr else decl.type
errors.try1(
lambda expr=expr, decl=decl: expr.infer_type(
lambda expr=expr, decltype=decltype: expr.infer_type(
type_env, stdlib, check_quant=check_quant, struct_types=struct_types
).typecheck(decl.type)
).typecheck(decltype)
)
except KeyError:
errors.append(Error.NoSuchInput(expr, name))
Expand Down
3 changes: 2 additions & 1 deletion WDL/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,8 @@ def values_from_json(
if not ty:
raise Error.InputError("unknown input/output: " + key) from None
if isinstance(ty, Tree.Decl):
ty = ty.type
# treat input with default as optional, with or without the ? type quantifier
ty = ty.type.copy(optional=True) if ty.expr else ty.type

assert isinstance(ty, Type.Base)
try:
Expand Down
13 changes: 13 additions & 0 deletions WDL/runtime/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,19 @@ def _eval_task_inputs(
posix_inputs: Env.Bindings[Value.Base],
container: "runtime.task_container.TaskContainer",
) -> Env.Bindings[Value.Base]:
# Preprocess inputs: if None value is supplied for an input declared with a default but without
# the ? type quantifier, remove the binding entirely so that the default will be used. In
# contrast, if the input declaration has an -explicitly- optional type, then we'll allow the
# supplied None to override any default.
input_decls = task.available_inputs
posix_inputs = posix_inputs.filter(
lambda b: not (
isinstance(b.value, Value.Null)
and b.name in input_decls
and input_decls[b.name].expr
and not input_decls[b.name].type.optional
)
)

# Map all the provided input File & Directory paths to in-container paths
container.add_paths(_fspaths(posix_inputs))
Expand Down
35 changes: 29 additions & 6 deletions WDL/runtime/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,22 +162,37 @@ def __init__(

self.values_to_json = values_to_json # pyre-ignore

# Preprocess inputs: if None value is supplied for an input declared with a default but
# without the ? type quantifier, remove the binding entirely so that the default will be
# used. In contrast, if the input declaration has an -explicitly- optional type, then we'll
# allow the supplied None to override any default.
input_decls = workflow.available_inputs
self.inputs = self.inputs.filter(
lambda b: not (
isinstance(b.value, Value.Null)
and b.name in input_decls
and input_decls[b.name].expr
and not input_decls[b.name].type.optional
)
)

workflow_nodes = (workflow.inputs or []) + workflow.body + (workflow.outputs or [])
workflow_nodes.append(WorkflowOutputs(workflow))

# TODO: by topsorting all section bodies we can ensure that when we schedule an additional
# job, all its dependencies will already have been scheduled, increasing
# flexibility/compatibility with various backends.
for node in workflow_nodes:
deps = node.workflow_node_dependencies
if isinstance(node, Tree.Decl):
# strike the dependencies of any decl node whose value is supplied in the inputs
if inputs.has_binding(node.name):
if self.inputs.has_binding(node.name):
deps = set()
self._schedule(
_Job(id=node.workflow_node_id, node=node, dependencies=deps, scatter_stack=[])
)

# TODO: by topsorting all section bodies we could ensure that when we schedule an
# additional job, all its dependencies will already have been scheduled, increasing
# flexibility/compatibility with various backends.

# sanity check
assert "outputs" in self.jobs
known_jobs = set(self.waiting)
Expand Down Expand Up @@ -366,14 +381,22 @@ def _do_job(
# check workflow inputs for additional inputs supplied to this call
for b in self.inputs.enter_namespace(job.node.name):
call_inputs = call_inputs.bind(b.name, b.value)
# coerce inputs to required types

# coerce inputs to required types (treating inputs with defaults as optional even if
# they don't have the ? type quantifier)
assert isinstance(job.node.callee, (Tree.Task, Tree.Workflow))
callee_inputs = job.node.callee.available_inputs
call_inputs = call_inputs.map(
lambda b: Env.Binding(
b.name,
(
b.value.coerce(callee_inputs[b.name].type)
b.value.coerce(
(
callee_inputs[b.name].type.copy(optional=True)
if callee_inputs[b.name].expr
else callee_inputs[b.name].type
)
)
if b.name in callee_inputs
else b.value
),
Expand Down
18 changes: 13 additions & 5 deletions tests/test_3corpi.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ class HCAskylab_task(unittest.TestCase):
"MixedIndentation": 1,
"FileCoercion": 1,
"MissingVersion": 34,
"UnnecessaryQuantifier": 3,
},
)
class HCAskylab_workflow(unittest.TestCase):
Expand Down Expand Up @@ -275,6 +276,7 @@ class GTEx(unittest.TestCase):
"UnusedDeclaration": 74,
"OptionalCoercion": 1,
"MissingVersion": 8,
"UnnecessaryQuantifier": 1,
},
check_quant=False,
)
Expand All @@ -292,6 +294,7 @@ class TOPMed(unittest.TestCase):
"UnusedImport": 1,
"SelectArray": 4,
"MissingVersion": 62,
"UnnecessaryQuantifier": 191,
},
)
class ViralNGS(unittest.TestCase):
Expand Down Expand Up @@ -379,6 +382,7 @@ class ENCODE_WGBS(unittest.TestCase):
"StringCoercion": 2,
"UnnecessaryQuantifier": 1,
"MissingVersion": 52,
"UnnecessaryQuantifier": 10,
},
check_quant=False,
)
Expand All @@ -395,7 +399,7 @@ class dxWDL(unittest.TestCase):
"StringCoercion": 6,
"FileCoercion": 3,
"NonemptyCoercion": 1,
"UnnecessaryQuantifier": 2,
"UnnecessaryQuantifier": 5,
"UnusedDeclaration": 2,
"IncompleteCall": 2,
"SelectArray": 1,
Expand All @@ -420,7 +424,7 @@ class Contrived(unittest.TestCase):
"FileCoercion": 5,
"OptionalCoercion": 3,
"NonemptyCoercion": 2,
"UnnecessaryQuantifier": 4,
"UnnecessaryQuantifier": 9,
"UnusedDeclaration": 9,
"IncompleteCall": 3,
"ArrayCoercion": 2,
Expand Down Expand Up @@ -451,6 +455,7 @@ class Contrived2(unittest.TestCase):
"NameCollision": 1,
"SelectArray": 1,
"UnverifiedStruct": 1,
"UnnecessaryQuantifier": 8,
},
)
class BioWDLTasks(unittest.TestCase):
Expand All @@ -460,11 +465,12 @@ class BioWDLTasks(unittest.TestCase):
@wdl_corpus(
["test_corpi/biowdl/aligning/**"],
expected_lint={
"OptionalCoercion": 12,
"OptionalCoercion": 11,
"UnusedDeclaration": 12,
"NonemptyCoercion": 1,
"NameCollision": 1,
"UnverifiedStruct": 1
"UnverifiedStruct": 1,
"UnnecessaryQuantifier": 13,
},
check_quant=False,
)
Expand All @@ -480,6 +486,7 @@ class BioWDLAligning(unittest.TestCase):
"NonemptyCoercion": 3,
"NameCollision": 1,
"UnverifiedStruct": 1,
"UnnecessaryQuantifier": 9,
},
check_quant=False,
)
Expand All @@ -495,6 +502,7 @@ class BioWDLExpressionQuantification(unittest.TestCase):
"UnusedDeclaration": 11,
"NonemptyCoercion": 37,
"SelectArray": 5,
"UnnecessaryQuantifier": 3,
},
check_quant=False,
)
Expand All @@ -507,10 +515,10 @@ class BioWDLSomaticVariantCalling(unittest.TestCase):
expected_lint={
"UnusedDeclaration": 8,
"SelectArray": 2,
"OptionalCoercion": 2,
"NonemptyCoercion": 3,
"UnusedCall": 1,
"UnverifiedStruct": 1,
"UnnecessaryQuantifier": 7,
},
check_quant=False,
)
Expand Down
62 changes: 62 additions & 0 deletions tests/test_7runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -922,3 +922,65 @@ def test_docker(self):
assert sum("18.04" in msg for msg in outputs["results"]) == 2
outputs = self._run(caller, {"message": "hello", "docker": "ubuntu:focal"})
assert sum("20.04" in msg for msg in outputs["results"]) == 2


class TestImplicitlyOptionalInputWithDefault(RunnerTestCase):
def test_workflow(self):
src = R"""
version 1.1
workflow contrived {
input {
String a = "Alice" + select_first([b, "Carol"])
String? b = "Bob"
}
output {
Array[String?] results = [a, b]
}
}
"""
outp = self._run(src, {})
self.assertEqual(outp["results"], ["AliceBob", "Bob"])
outp = self._run(src, {"a": "Alyssa"})
self.assertEqual(outp["results"], ["Alyssa", "Bob"])
outp = self._run(src, {"b": "Bas"})
self.assertEqual(outp["results"], ["AliceBas", "Bas"])
outp = self._run(src, {"b": None})
self.assertEqual(outp["results"], ["AliceCarol", None])
outp = self._run(src, {"a": None, "b": None})
self.assertEqual(outp["results"], ["AliceCarol", None])

def test_task(self):
caller = R"""
version 1.1
workflow caller {
input {
String? a
String? b
}
call contrived {
input:
a = a, b = b
}
output {
Array[String?] results = contrived.results
}
}
task contrived {
input {
String a = "Alice" + select_first([b, "Carol"])
String? b = "Bob"
}
command {}
output {
Array[String?] results = [a, b]
}
}
"""
outp = self._run(caller, {})
self.assertEqual(outp["results"], ["AliceCarol", None])
outp = self._run(caller, {"a": None, "b": None})
self.assertEqual(outp["results"], ["AliceCarol", None])
outp = self._run(caller, {"b": "Bas"})
self.assertEqual(outp["results"], ["AliceBas", "Bas"])
outp = self._run(caller, {"a": "Alyssa"})
self.assertEqual(outp["results"], ["Alyssa", None])