Skip to content

Commit

Permalink
WDL Directory type (local input only) (#424)
Browse files Browse the repository at this point in the history
Introduces WDL 2.0 Directory type, for now supported only for non-downloaded inputs
  • Loading branch information
mlin committed Sep 5, 2020
1 parent 4ce66ed commit e691cc9
Show file tree
Hide file tree
Showing 13 changed files with 385 additions and 147 deletions.
16 changes: 15 additions & 1 deletion WDL/CLI.py
Original file line number Diff line number Diff line change
Expand Up @@ -1000,6 +1000,20 @@ def runner_input_value(s_value, ty, file_found, root):
elif not (file_found and file_found(fn)): # maybe URI
raise Error.InputError("File not found: " + fn)
return Value.File(fn)
if isinstance(ty, Type.Directory):
dn = os.path.expanduser(s_value)
if os.path.isdir(dn):
dn = os.path.abspath(dn)
if not path_really_within(dn, root):
raise Error.InputError(
f"all input paths must be located within the configured `file_io.root' directory `{root}' "
f"unlike `{dn}'"
)
# TODO: courtesy check for symlinks that have absolute paths or relatively point
# outside the directory
else: # TODO: relax for URIs
raise Error.InputError("Directory not found: " + dn)
return Value.Directory(dn)
if isinstance(ty, Type.Boolean):
if s_value == "true":
return Value.Boolean(True)
Expand Down Expand Up @@ -1241,7 +1255,7 @@ def localize(
doc = load(wdlfile, path or [], check_quant=check_quant, read_source=read_source)

def file_found(fn):
return runtime.download.able(cfg, fn) or os.path.isfile(fn)
return runtime.download.able(cfg, fn) or os.path.exists(fn)

try:
target, input_env, input_json = runner_input(
Expand Down
18 changes: 10 additions & 8 deletions WDL/Lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,9 +188,11 @@ def _compound_coercion(to_type, from_type, base_to_type, extra_from_type=None):
to_type.right_type, from_type.right_type, base_to_type, extra_from_type
)
if isinstance(to_type, base_to_type):
coercible = list(base_to_type)
if extra_from_type:
return not isinstance(from_type, (base_to_type, extra_from_type, Type.Any))
return not isinstance(from_type, (base_to_type, Type.Any))
coercible.append(extra_from_type)
coercible.append(Type.Any)
return not isinstance(from_type, tuple(coercible))
return False


Expand All @@ -215,7 +217,7 @@ def decl(self, obj: Tree.Decl) -> Any:
if obj.expr and _compound_coercion(
obj.type,
obj.expr.type,
Type.String,
(Type.String,),
(Type.File if isinstance(_parent_executable(obj), Tree.Task) else None),
):
self.add(obj, "{} {} = :{}:".format(str(obj.type), obj.name, str(obj.expr.type)))
Expand Down Expand Up @@ -259,7 +261,7 @@ def expr(self, obj: Expr.Base) -> Any:
if _compound_coercion(
F_i,
arg_i.type,
Type.String,
(Type.String,),
(Type.File if isinstance(_parent_executable(obj), Tree.Task) else None),
):
msg = "{} argument of {}() = :{}:".format(
Expand Down Expand Up @@ -287,7 +289,7 @@ def expr(self, obj: Expr.Base) -> Any:
def call(self, obj: Tree.Call) -> Any:
for name, inp_expr in obj.inputs.items():
decl = _find_input_decl(obj, name)
if _compound_coercion(decl.type, inp_expr.type, Type.String):
if _compound_coercion(decl.type, inp_expr.type, (Type.String,)):
msg = "input {} {} = :{}:".format(str(decl.type), decl.name, str(inp_expr.type))
self.add(obj, msg, inp_expr.pos)

Expand Down Expand Up @@ -316,7 +318,7 @@ def decl(self, obj: Tree.Decl) -> Any:
super().decl(obj)
if (
obj.expr
and _compound_coercion(obj.type, obj.expr.type, Type.File)
and _compound_coercion(obj.type, obj.expr.type, (Type.File, Type.Directory))
and not (
isinstance(obj.expr, Expr.String)
and obj.expr.literal
Expand All @@ -334,7 +336,7 @@ def expr(self, obj: Expr.Base) -> Any:
for i in range(min(len(F.argument_types), len(obj.arguments))):
F_i = F.argument_types[i]
arg_i = obj.arguments[i]
if _compound_coercion(F_i, arg_i.type, Type.File):
if _compound_coercion(F_i, arg_i.type, (Type.File, Type.Directory)):
msg = "{} argument of {}() = :{}:".format(str(F_i), F.name, str(arg_i.type))
self.add(obj, msg, arg_i.pos)
elif obj.function_name == "size":
Expand All @@ -354,7 +356,7 @@ def call(self, obj: Tree.Call) -> Any:
super().call(obj)
for name, inp_expr in obj.inputs.items():
decl = _find_input_decl(obj, name)
if _compound_coercion(decl.type, inp_expr.type, Type.File):
if _compound_coercion(decl.type, inp_expr.type, (Type.File, Type.Directory)):
msg = "input {} {} = :{}:".format(str(decl.type), decl.name, str(inp_expr.type))
self.add(obj, msg, inp_expr.pos)

Expand Down
21 changes: 21 additions & 0 deletions WDL/Tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,11 @@ def typecheck(
errors.try1(
lambda: decl.typecheck(type_env, stdlib=stdlib, check_quant=check_quant)
)
if _has_directories(decl.type):
# FIXME
raise Error.ValidationError(
decl, "Directory outputs aren't supported in this version of miniwdl"
)

# check for cyclic dependencies among decls
_detect_cycles(
Expand Down Expand Up @@ -1062,6 +1067,11 @@ def typecheck(self, doc: "Document", check_quant: bool) -> None:
)
)
output_type_env = output_type_env2
if _has_directories(output.type):
# FIXME
raise Error.ValidationError(
output, "Directory outputs aren't supported in this version of miniwdl"
)
# 6. check for cyclic dependencies
_detect_cycles(_workflow_dependency_matrix(self))

Expand Down Expand Up @@ -1804,3 +1814,14 @@ def _add_struct_instance_to_type_env(
else:
ans = ans.bind(namespace + "." + member_name, member_type, ctx)
return ans


def _has_directories(t: Type.Base):
"""
used to check output declarations for Directory types while we don't support them
"""
if isinstance(t, Type.Directory) or next(
(p for p in t.parameters if _has_directories(p)), None
):
return True
return False
13 changes: 12 additions & 1 deletion WDL/Type.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,13 +174,24 @@ def coerces(self, rhs: Base, check_quant: bool = True) -> bool:
return super().coerces(rhs, check_quant)


class Directory(Base):
def __init__(self, optional: bool = False) -> None:
self._optional = optional

def coerces(self, rhs: Base, check_quant: bool = True) -> bool:
""
if isinstance(rhs, String):
return True
return super().coerces(rhs, check_quant)


class String(Base):
def __init__(self, optional: bool = False) -> None:
self._optional = optional

def coerces(self, rhs: Base, check_quant: bool = True) -> bool:
""
if isinstance(rhs, (File, Int, Float)):
if isinstance(rhs, (File, Directory, Int, Float)):
return self._check_optional(rhs, check_quant)
return super().coerces(rhs, check_quant)

Expand Down
73 changes: 60 additions & 13 deletions WDL/Value.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,13 +145,18 @@ def coerce(self, desired_type: Optional[Type.Base] = None) -> Base:
class String(Base):
"""``value`` has Python type ``str``"""

def __init__(self, value: str, expr: "Optional[Expr.Base]" = None) -> None:
super().__init__(Type.String(), value, expr)
def __init__(
self, value: str, expr: "Optional[Expr.Base]" = None, subtype: Optional[Type.Base] = None
) -> None:
subtype = subtype or Type.String()
super().__init__(subtype, value, expr)

def coerce(self, desired_type: Optional[Type.Base] = None) -> Base:
""
if isinstance(desired_type, Type.File) and not isinstance(self, File):
return File(self.value, self.expr)
if isinstance(desired_type, Type.Directory) and not isinstance(self, Directory):
return Directory(self.value, self.expr)
try:
if isinstance(desired_type, Type.Int):
return Int(int(self.value), self.expr)
Expand All @@ -167,6 +172,11 @@ def coerce(self, desired_type: Optional[Type.Base] = None) -> Base:
class File(String):
"""``value`` has Python type ``str``"""

def __init__(self, value: str, expr: "Optional[Expr.Base]" = None) -> None:
super().__init__(value, expr=expr, subtype=Type.File())
if value != value.rstrip("/"):
raise Error.InputError("WDL.Value.File invalid path: " + value)

def coerce(self, desired_type: Optional[Type.Base] = None) -> Base:
""
if self.value is None:
Expand All @@ -179,6 +189,18 @@ def coerce(self, desired_type: Optional[Type.Base] = None) -> Base:
return super().coerce(desired_type)


class Directory(String):
"""``value`` has Python type ``str``"""

def __init__(self, value: str, expr: "Optional[Expr.Base]" = None) -> None:
super().__init__(value, expr=expr, subtype=Type.Directory())

def coerce(self, desired_type: Optional[Type.Base] = None) -> Base:
""
# TODO: similar coercion logic for Directory? outputs when we support those
return super().coerce(desired_type)


class Array(Base):
"""``value`` is a Python ``list`` of other ``WDL.Value.Base`` instances"""

Expand Down Expand Up @@ -412,6 +434,8 @@ def from_json(type: Type.Base, value: Any) -> Base:
return Float(float(value))
if isinstance(type, Type.File) and isinstance(value, str):
return File(value)
if isinstance(type, Type.Directory) and isinstance(value, str):
return Directory(value)
if isinstance(type, (Type.String, Type.Any)) and isinstance(value, str):
return String(value)
if isinstance(type, Type.Array) and isinstance(value, list):
Expand Down Expand Up @@ -470,28 +494,51 @@ def _infer_from_json(j: Any) -> Base:
raise Error.InputError(f"couldn't construct value from: {json.dumps(j)}")


def rewrite_files(v: Base, f: Callable[[str], str]) -> Base:
def rewrite_paths(v: Base, f: Callable[[Union[File, Directory]], str]) -> Base:
"""
Produce a deep copy of the given Value with all File names rewritten by the given function
(including Files nested inside compound Values).
Produce a deep copy of the given Value with all File & Directory paths (including those nested
inside compound Values) rewritten by the given function.
"""

mapped_files = set()
mapped_paths = set()

def map_files(v2: Base) -> Base:
if isinstance(v2, File):
assert id(v2) not in mapped_files, f"File {id(v2)} reused in deepcopy"
v2.value = f(v2.value)
mapped_files.add(id(v2))
def map_paths(v2: Base) -> Base:
if isinstance(v2, (File, Directory)):
assert id(v2) not in mapped_paths, f"File/Directory {id(v2)} reused in deepcopy"
v2.value = f(v2)
mapped_paths.add(id(v2))
for ch in v2.children:
map_files(ch)
map_paths(ch)
return v2

return map_files(copy.deepcopy(v))
return map_paths(copy.deepcopy(v))


def rewrite_env_paths(
env: Env.Bindings[Base], f: Callable[[Union[File, Directory]], str]
) -> Env.Bindings[Base]:
"""
Produce a deep copy of the given Value Env with all File & Directory paths rewritten by the
given function.
"""
return env.map(lambda binding: Env.Binding(binding.name, rewrite_paths(binding.value, f)))


def rewrite_files(v: Base, f: Callable[[str], str]) -> Base:
"""
Produce a deep copy of the given Value with all File names rewritten by the given function
(including Files nested inside compound Values).
(deprecated: use ``rewrite_paths`` to handle Directory values as well)
"""

return rewrite_paths(v, lambda fd: f(fd.value) if isinstance(fd, File) else fd.value)


def rewrite_env_files(env: Env.Bindings[Base], f: Callable[[str], str]) -> Env.Bindings[Base]:
"""
Produce a deep copy of the given Value Env with all File names rewritten by the given function.
(deprecated: use ``rewrite_env_paths`` to handle Directory values as well)
"""
return env.map(lambda binding: Env.Binding(binding.name, rewrite_files(binding.value, f)))
4 changes: 2 additions & 2 deletions WDL/_grammar.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@

keywords = {}
keywords["draft-2"] = set(
"Array Float Int Map None Pair String as call command else false if import input left meta object output parameter_meta right runtime scatter task then true workflow".split(
"Array File Float Int Map None Pair String as call command else false if import input left meta object output parameter_meta right runtime scatter task then true workflow".split(
" "
)
)
Expand Down Expand Up @@ -479,7 +479,7 @@
%ignore COMMENT
"""
keywords["development"] = set(
"Array Float Int Map None Pair String alias as call command else false if import input left meta object output parameter_meta right runtime scatter struct task then true workflow".split(
"Array Directory File Float Int Map None Pair String alias as call command else false if import input left meta object output parameter_meta right runtime scatter struct task then true workflow".split(
" "
)
)
Expand Down

0 comments on commit e691cc9

Please sign in to comment.