Skip to content

Commit

Permalink
warning for basename collisions amongst output files (#643)
Browse files Browse the repository at this point in the history
  • Loading branch information
mlin committed Jun 18, 2023
1 parent 8def8d6 commit 1bc3776
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 3 deletions.
31 changes: 29 additions & 2 deletions WDL/runtime/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,7 @@ def run_local_task(
# downstream tasks
_delete_work(cfg, logger, container, True)
chmod_R_plus(run_dir, file_bits=0o660, dir_bits=0o770)
_warn_output_basename_collisions(logger, outputs)

# write outputs.json
write_values_json(
Expand Down Expand Up @@ -377,7 +378,7 @@ def _eval_task_inputs(

# Map all the provided input File & Directory paths to in-container paths
container.add_paths(_fspaths(posix_inputs))
_warn_basename_collisions(logger, container)
_warn_input_basename_collisions(logger, container)

# copy posix_inputs with all File & Directory values mapped to their in-container paths
def map_paths(fn: Union[Value.File, Value.Directory]) -> str:
Expand Down Expand Up @@ -456,7 +457,7 @@ def collector(v: Value.Base) -> None:
return ans


def _warn_basename_collisions(
def _warn_input_basename_collisions(
logger: logging.Logger, container: "runtime.task_container.TaskContainer"
) -> None:
basenames = Counter(
Expand Down Expand Up @@ -957,6 +958,32 @@ def map_path_relative(v: Union[Value.File, Value.Directory]) -> str:
return Value.rewrite_env_paths(outputs, map_path_relative)


def _warn_output_basename_collisions(
logger: logging.Logger, outputs: Env.Bindings[Value.Base]
) -> None:
targets_by_basename = {}

def walker(v: Union[Value.File, Value.Directory]) -> str:
target = v.value
if os.path.exists(target):
target = os.path.realpath(target)
basename = os.path.basename(target)
targets_by_basename.setdefault(basename, set()).add(target)
return v.value

Value.rewrite_env_paths(outputs, walker)

collisions = [bn for bn, targets in targets_by_basename.items() if len(targets) > 1]
if collisions:
logger.warning(
_(
"multiple output files share the same basename; while miniwdl supports this,"
" consider modifying WDL to ensure distinct output basenames",
basenames=collisions,
)
)


def _delete_work(
cfg: config.Loader,
logger: logging.Logger,
Expand Down
9 changes: 8 additions & 1 deletion WDL/runtime/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,13 @@
from typing import Optional, List, Set, Tuple, NamedTuple, Dict, Union, Iterable, Callable, Any
from contextlib import ExitStack
from .. import Env, Type, Value, Tree, StdLib, Error
from .task import run_local_task, _fspaths, link_outputs, _add_downloadable_defaults
from .task import (
run_local_task,
_fspaths,
link_outputs,
_add_downloadable_defaults,
_warn_output_basename_collisions,
)
from .download import able as downloadable, run_cached as download
from .._util import (
write_atomic,
Expand Down Expand Up @@ -1017,6 +1023,7 @@ def _workflow_main_loop(
# process outputs through plugins
recv = plugins.send({"outputs": outputs})
outputs = recv["outputs"]
_warn_output_basename_collisions(logger, outputs)

# write outputs.json
write_values_json(
Expand Down

0 comments on commit 1bc3776

Please sign in to comment.